Each row in MathAchieve represents each of the 7185 students within 160 schools. Each row in MathAchSchool represents each of 160 schools.

[http://www.stat.rutgers.edu/home/yhung/Stat586/Mixed%20model/appendix-mixed-models.pdf]

The variables that we are going to use: - School - SES - MathAch - Sector - MEANSES

data("MathAchieve")
summary(MathAchieve)
##      School     Minority       Sex            SES               MathAch      
##  2305   :  67   No :5211   Male  :3390   Min.   :-3.758000   Min.   :-2.832  
##  5619   :  66   Yes:1974   Female:3795   1st Qu.:-0.538000   1st Qu.: 7.275  
##  4292   :  65                            Median : 0.002000   Median :13.131  
##  8857   :  64                            Mean   : 0.000143   Mean   :12.748  
##  4042   :  64                            3rd Qu.: 0.602000   3rd Qu.:18.317  
##  3610   :  64                            Max.   : 2.692000   Max.   :24.993  
##  (Other):6795                                                                
##     MEANSES         
##  Min.   :-1.188000  
##  1st Qu.:-0.317000  
##  Median : 0.038000  
##  Mean   : 0.006138  
##  3rd Qu.: 0.333000  
##  Max.   : 0.831000  
## 
attach(MathAchieve)
mses <- tapply(SES, School, mean)
detach(MathAchieve)
data("MathAchSchool")
head(MathAchSchool)
##      School Size   Sector PRACAD DISCLIM HIMINTY MEANSES
## 1224   1224  842   Public   0.35   1.597       0  -0.428
## 1288   1288 1855   Public   0.27   0.174       0   0.128
## 1296   1296 1719   Public   0.32  -0.137       1  -0.420
## 1308   1308  716 Catholic   0.96  -0.622       0   0.534
## 1317   1317  455 Catholic   0.95  -1.694       1   0.351
## 1358   1358 1430   Public   0.25   1.535       0  -0.014

Since the MEANSES is different in MathAchieve and MathAchSchool, the new mean of SES for students in each school is generated.

Bryk <- as.data.frame(MathAchieve[,c("School", "SES", "MathAch")])
Bryk$meanses <- mses[as.character(Bryk$School)]
sector <- MathAchSchool$Sector
names(sector) <- row.names(MathAchSchool)
Bryk$Sector <- sector[as.character(Bryk$School)]
contrasts(Bryk$Sector)
##          Catholic
## Public          0
## Catholic        1
ggplot(MathAchieve, aes(SES, MathAch)) + geom_point() + facet_wrap(~School)

fit <- lmer(MathAch ~ SES + Sector + (1|School), data = Bryk)

N <- nrow(Bryk) 
school <- unique(as.character(Bryk$School))
nschool <- length(school)

y <-  Bryk$MathAch
X <- model.matrix(~ SES + Sector, data = Bryk)
beta <- fixef(fit)
Z <- model.matrix(~ School - 1, data = Bryk)
u <- ranef(fit)$School[,1]
q <- length(u)
vc <- as.data.frame(VarCorr(fit))
R <- vc$vcov[vc$grp == "Residual"] * diag(N)
G <- vc$vcov[vc$grp == "School"] * diag(q)
Sigma <- Z %*% G %*% t(Z) + R
Sigma_inv <- solve(Sigma)
varMargRes <- Sigma - X %*% solve(t(X) %*% solve(Sigma) %*% X) %*% t(X)
P <- Sigma_inv - Sigma_inv %*% X %*% solve(t(X) %*% Sigma_inv %*% X) %*% t(X) %*% Sigma_inv
varCondRes <- R %*% P %*% R
varU <- G - G %*% t(Z) %*% P %*% Z %*% G
fit_bryk <- Bryk %>% 
  mutate(fitted = as.vector(X %*% beta),
         resm = MathAch - fitted, 
         resc = MathAch - fitted - as.vector(Z %*% u),
         index = 1:n()) %>% 
  rowwise() %>% 
  mutate(resm_std = resm / sqrt(diag(varMargRes)[index]),
         resc_std = resc / sqrt(diag(varCondRes)[index])) %>% 
  ungroup()

Lesaffre and Verbeke comment that when the within-unit covariance structure is adequate, \(V_i = ||I_{m_i} - \varepsilon_i \varepsilon^\top||^2\), where \(\varepsilon_i = \hat \Omega ^{-1/2} \hat \xi\) should be close to zero. However, Singer consider replacing \(\varepsilon_i\) in \(V_i\) with the standardised marginal residuals \(\hat \xi_i^* = [\hat V(\hat \xi_i)]^{-1/2} \hat \xi_i\), where \(V(\hat \xi_i)\) refers to the diagonal block of \(\Omega - X(X^\top \Omega^{-1} X)^{-1} X^\top\) associated to the \(i\) unit.

unit_df <- expand_grid(school) %>% 
  # cannot follow Vi quit well
  mutate(Vi = map_dbl(school, ~{
                        ind <- Bryk %>% 
                          mutate(index = 1:n()) %>% 
                          filter(School == .x) %>% 
                          pull(index)
                        mi <- length(ind)
                        # standardised marginal residual
                        Ei <- solve(expm::sqrtm(varMargRes[ind, ind])) %*% fit_bryk$resm[ind]
                        sqrt(sum((diag(mi) - Ei %*% t(Ei))^2)) / mi
                      }),
         Mi = as.vector(t(u) %*% solve(varU) %*% u)
         )

Diagnostic Plot 1: Linearity of fixed effects

ggplot(fit_bryk, aes(fitted, resm_std)) +
  geom_hline(yintercept = 0) +
  geom_point()

Diagnostic Plot 2: Prescence of outlying observations

ggplot(fit_bryk, aes(index, resm_std)) + geom_point() + geom_hline(yintercept = 0)

Diagnostic Plot 3: Within-units covariance matrix

ggplot(unit_df, aes(1:nrow(unit_df), Vi)) + geom_point()+ xlab("Unit indicies") + ylab("Modified Lesaffre-Verbeke index")

Diagnostic Plot 4: Prescense of outlying observations using conditional residual

ggplot(fit_bryk, aes(index, resc_std)) + geom_point() + geom_hline(yintercept = 0)

Diagnostic Plot 5: Homoskedasticity of conditional errors

ggplot(fit_bryk, aes(fitted, resc_std)) + geom_point() + geom_hline(yintercept = 0)

Diagnostic Plot 6: Normality of conditional errors

ggplot(fit_bryk, aes(sample = resc_std)) + 
  geom_qq() + geom_qq_line(color = "red")

Diagnostic Plot 7: Presencse of outlying subjects

ggplot(unit_df, aes(1:nrow(unit_df), Mi)) + geom_point() + xlab("Unit index") + ylab("Manhalanobis distance")

Diagnostic Plot 8: Normality of the random effects

ggplot(unit_df, aes(sample = Mi)) + 
  geom_qq(distribution = stats::qchisq,
          dparams = list(df = q)) +
  geom_qq_line(color = "red",
               distribution = stats::qchisq,
               dparams = list(df = q))

fit.sim  <- simulate(fit, nsim = 19, seed = 1234)
fit.refit <- lapply(fit.sim, refit, object = fit)
fit.simy <- lapply(fit.refit, function(x) getME(x, "y"))
fit.sim.y <- do.call("cbind", fit.simy)
fit.sim.y <- reshape2::melt(fit.sim.y)[-1]
names(fit.sim.y) <- c(".n", "y")
fit.sim.y$.n <- as.numeric(str_extract(fit.sim.y$.n, "\\d+"))
fit.sim.y$School <- rep(Bryk$School, 19)
fit.sim.y$SES <- rep(Bryk$SES, 19)
fit.sim.y$Sector <- rep(Bryk$Sector, 19)
ggplot(MathAchieve, aes(Sex, MathAch)) +
  geom_beeswarm(alpha = .2) +
  facet_grid(. ~Minority)

length(unique(MathAchieve$School))
## [1] 160

160 schools with the MEANSES is the nummeric vector of the mean SES for the school.

df <- MathAchieve %>% mutate(Sex = ifelse(Sex == "Male", 0, 1), Minority = ifelse(Minority == "No", 0, 1))
str(df)
## Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame':   7185 obs. of  6 variables:
##  $ School  : Ord.factor w/ 160 levels "8367"<"8854"<..: 59 59 59 59 59 59 59 59 59 59 ...
##  $ Minority: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Sex     : num  1 1 0 0 0 0 1 0 1 0 ...
##  $ SES     : num  -1.528 -0.588 -0.528 -0.668 -0.158 ...
##  $ MathAch : num  5.88 19.71 20.35 8.78 17.9 ...
##  $ MEANSES : num  -0.428 -0.428 -0.428 -0.428 -0.428 -0.428 -0.428 -0.428 -0.428 -0.428 ...
##  - attr(*, "formula")=Class 'formula'  language MathAch ~ SES | School
##   .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> 
##  - attr(*, "labels")=List of 2
##   ..$ y: chr "Mathematics Achievement score"
##   ..$ x: chr "Socio-economic score"
##  - attr(*, "FUN")=function (x)  
##   ..- attr(*, "source")= chr "function (x) max(x, na.rm = TRUE)"
##  - attr(*, "order.groups")= logi TRUE
model <- lmer(MathAch ~ Sex + SES + (1|School) + (1|MEANSES), data = df)
summary(model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: MathAch ~ Sex + SES + (1 | School) + (1 | MEANSES)
##    Data: df
## 
## REML criterion at convergence: 46595.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1769 -0.7375  0.0342  0.7658  2.8048 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept)  4.4722  2.1148  
##  MEANSES  (Intercept)  0.0193  0.1389  
##  Residual             36.8132  6.0674  
## Number of obs: 7185, groups:  School, 160; MEANSES, 150
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  13.2769     0.2025  65.557
## Sex          -1.1874     0.1654  -7.178
## SES           2.3564     0.1054  22.348
## 
## Correlation of Fixed Effects:
##     (Intr) Sex   
## Sex -0.426       
## SES -0.021  0.056